##CONTENTS
#Representativeness of Respondents from Abortion Experiment
#Abortion Results

rm(list = ls())
library(foreign)
library(plyr)
library(sandwich)
library(data.table)
library(lmtest)
library(ggplot2)
library(grid)
library(reshape2)
library(SDMTools)
library(xtable)
library(extrafont)

setwd("")

#Representativeness of Respondents from Abortion Experiment
data <- read.dta('data/abortion_anon.dta')
abortion.rep.df <- matrix(NA, nrow=60, ncol=7)
abortion.rep.df[,1] <- c(rep("vf_female",5), rep("vf_white",5), rep("vf_hispanic",5), rep("vf_nophone",5),
                         rep("vf_asian",5), rep("vf_dem",5), rep("vf_rep",5),
                         rep("vg_14",5), rep("vg_12",5),
                         rep("vf_under35",5), rep("vf_under35to60",5), rep("vf_over60",5))
abortion.rep.df[,2] <- c(rep("% Female",5), rep("% White",5), rep("% Hispanic",5), rep("% without Phone",5),
                         rep("% Asian",5), rep("% Democrat",5), rep("% Republican",5),
                         rep("% Voted 2014",5), rep("% Voted 2012",5),
                         rep("% Age 18-34",5), rep("% Age 35-60",5), rep("% Age 60+",5))
abortion.rep.df[,3] <- rep(c("Mailed", "t0 Respondents", "Canvassed",
                             "t1 Respondents", "t2 Respondents"), 12)
  
for (i in 1:nrow(abortion.rep.df)) {
  #Calculate the means
 if (abortion.rep.df[i,3] == "Mailed") abortion.rep.df[i,4] <- mean(data[,abortion.rep.df[i,1]])
 if (abortion.rep.df[i,3] == "t0 Respondents") abortion.rep.df[i,4] <- mean(data[which(!is.na(data$respondent_t0)),abortion.rep.df[i,1]])
 if (abortion.rep.df[i,3] == "Canvassed") abortion.rep.df[i,4] <- mean(data[which(!is.na(data$canvassed)),abortion.rep.df[i,1]])
 if (abortion.rep.df[i,3] == "t1 Respondents") abortion.rep.df[i,4] <- mean(data[which(!is.na(data$respondent_t1)),abortion.rep.df[i,1]])
 if (abortion.rep.df[i,3] == "t2 Respondents") abortion.rep.df[i,4] <- mean(data[which(!is.na(data$respondent_t2)),abortion.rep.df[i,1]])
 #Calculate the SEM
 if (abortion.rep.df[i,3] == "Mailed") abortion.rep.df[i,5] <- sd(data[,abortion.rep.df[i,1]])/sqrt(length(data[,abortion.rep.df[i,1]])) 
 if (abortion.rep.df[i,3] == "t0 Respondents") abortion.rep.df[i,5] <- sd(data[which(!is.na(data$respondent_t0)),abortion.rep.df[i,1]])/sqrt(length(data[which(!is.na(data$respondent_t0)),abortion.rep.df[i,1]])) 
 if (abortion.rep.df[i,3] == "Canvassed") abortion.rep.df[i,5] <- sd(data[which(!is.na(data$canvassed)),abortion.rep.df[i,1]])/sqrt(length(data[which(!is.na(data$canvassed)),abortion.rep.df[i,1]]))
 if (abortion.rep.df[i,3] == "t1 Respondents") abortion.rep.df[i,5] <- sd(data[which(!is.na(data$respondent_t1)),abortion.rep.df[i,1]])/sqrt(length(data[which(!is.na(data$respondent_t1)),abortion.rep.df[i,1]]))
 if (abortion.rep.df[i,3] == "t2 Respondents") abortion.rep.df[i,5] <- sd(data[which(!is.na(data$respondent_t2)),abortion.rep.df[i,1]])/sqrt(length(data[which(!is.na(data$respondent_t2)),abortion.rep.df[i,1]]))
}
abortion.rep.df[,6] <- as.numeric(abortion.rep.df[,4]) - 1.96*as.numeric(abortion.rep.df[,5])
abortion.rep.df[,7] <- as.numeric(abortion.rep.df[,4]) + 1.96*as.numeric(abortion.rep.df[,5])
abortion.rep.df <- as.data.frame(abortion.rep.df)
abortion.rep.df <- rename(abortion.rep.df, c("V1" = "variable", "V2" = "string", "V3" = "stage", "V4" = "mean", 
                                             "V5" = "sem", "V6" = "lower", "V7" = "upper", "V8" = "stagecolor"))
abortion.rep.df$mean <- as.numeric(levels(abortion.rep.df$mean)[abortion.rep.df$mean])
abortion.rep.df$sem <- as.numeric(levels(abortion.rep.df$sem)[abortion.rep.df$sem])
abortion.rep.df$lower <- as.numeric(levels(abortion.rep.df$lower)[abortion.rep.df$lower])
abortion.rep.df$upper <- as.numeric(levels(abortion.rep.df$upper)[abortion.rep.df$upper])
#Reorder levels
abortion.rep.df$stage <-factor(abortion.rep.df$stage,levels(abortion.rep.df$stage)[c(5,4,1,3,2)])
abortion.rep.df$string <-factor(abortion.rep.df$string,levels(abortion.rep.df$string)[c(5,8:10,12,11,4,7,6,1:3)])

#Table OA5: Representativeness of survey respondents in abortion application study.
#This produces the numbers required for the table, not the formatted table itself. We only use a set of these variables in the table.
xtable(abortion.rep.df[,2:4], xdigits=3)
#N at each stage.
nrow(data) #Mailed
sum(!is.na(data$respondent_t0)) #t0 Respondent
sum(!is.na(data$canvassed)) #Canvassed
sum(!is.na(data$respondent_t1)) #t1 Respondent
sum(!is.na(data$respondent_t2)) #t2 Respondent

# dot plot
p <- ggplot(abortion.rep.df, aes(x = mean, xmin = lower, xmax = upper, y = stage)) +
  geom_point() +
  geom_errorbarh(aes(height = .5)) +
  facet_wrap(~ string, ncol = 3) +
  scale_x_continuous(breaks = seq(0, 1, .2), 
                     labels = c(0, "", .4, "", .8, "")) + 
  labs(x = "Proportion of Characteristic", y = NULL) + 
  theme_bw() +
  theme(panel.grid.major.y = element_line(colour = "grey60"), 
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.margin = unit(0, "lines"),
        strip.background = element_rect(fill = "grey80"),
        axis.ticks.y = element_blank(), 
        legend.position="none",
        text = element_text(family = 'Verdana')) +
  geom_vline(xintercept = 0)
p
#Figure OA2: Representativeness of Survey Respondents in Abortion Application Study
ggsave("output/abortion_representativeness_dotplot.tiff", 
       p, width = 6, height = 8)

#Abortion Results
data <- subset(data, data$respondent_t0==1)
# Compute factor analysis outcome
compute.factor.dv <- function(dv.names, respondent.booleans, print.loadings = TRUE){
  responders <- data[respondent.booleans,]
  # Factor analysis
  factor.obj <- princomp(responders[, dv.names], cor=TRUE)
  if(print.loadings) print(loadings(factor.obj))
  dv <- as.vector(factor.obj$scores[,1])
  # More positive values on the factor should indicate more tolerance; reverse otherwise.
  if(cor(dv, data$t0_abortion_factor[respondent.booleans], use="complete.obs") < 0) dv <- -1 * dv
  # Put in the order of the main data frame
  dv.in.order <- dv[match(data$anon_id, responders$anon_id)]
  # Rescale to mean 0 sd 1 in placebo group; treatment effects can then be interpreted
  # as the effect in standard deviations the treatment would have among an untreated
  # population.
  dv.in.order <- (dv.in.order - mean(dv.in.order[!data$treat_ind], na.rm=TRUE)) /
    sd(dv.in.order[!data$treat_ind], na.rm=TRUE)
  return(as.vector(dv.in.order))
}
# Compute the DVs in line with the above procedures.
all.dv.names.t1 <- c('t1_ballot_abortion', 't1_gssifthewomanshealthisse', 
                     't1_gssthewomanwantsitforany', 't1_gssifshebecamepregnantas', 
                     't1_gssifthereisastrongchanc', 't1_gssifthefamilyhasaverylo', 
                     't1_gssifsheisnotmarriedandd', 't1_gssifsheismarriedanddoes', 
                     't1_abort_itemitswomensfa', 't1_abort_itemwomenwhohav', 
                     't1_abort_itemwithmodernb', 't1_abort_itemtheresnothi', 
                     't1_abort_itemifawomanfee', 't1_abort_itemthegovernme', 
                     't1_abort_itemifafriendch')
all.dv.names.t2 <- c('t2_ballot_abortion', 't2_gssifthewomanshealthisse', 
                     't2_gssthewomanwantsitforany', 't2_gssifshebecamepregnantas', 
                     't2_gssifthereisastrongchanc', 't2_gssifthefamilyhasaverylo', 
                     't2_gssifsheisnotmarriedandd', 't2_gssifsheismarriedanddoes', 
                     't2_abort_itemitswomensfa', 't2_abort_itemwomenwhohav', 
                     't2_abort_itemwithmodernb', 't2_abort_itemtheresnothi', 
                     't2_abort_itemifawomanfee', 't2_abort_itemthegovernme', 
                     't2_abort_itemifafriendch', 't2_gssiftheyforgottousebirt')
policy.dv.names.t1 <- c('t1_ballot_abortion', 't1_gssifthewomanshealthisse', 
                        't1_gssthewomanwantsitforany', 't1_gssifshebecamepregnantas', 
                        't1_gssifthereisastrongchanc', 't1_gssifthefamilyhasaverylo', 
                        't1_gssifsheisnotmarriedandd', 't1_gssifsheismarriedanddoes', 
                        't1_abort_itemthegovernme')
policy.dv.names.t2 <- c('t2_ballot_abortion', 't2_gssifthewomanshealthisse', 
                        't2_gssthewomanwantsitforany', 't2_gssifshebecamepregnantas', 
                        't2_gssifthereisastrongchanc', 't2_gssifthefamilyhasaverylo', 
                        't2_gssifsheisnotmarriedandd', 't2_gssifsheismarriedanddoes', 
                        't2_abort_itemthegovernme', 't2_gssiftheyforgottousebirt')
stigma.dv.names.t1 <- c('t1_abort_itemitswomensfa', 't1_abort_itemwomenwhohav', 
                        't1_abort_itemwithmodernb', 't1_abort_itemtheresnothi', 
                        't1_abort_itemifawomanfee', 't1_abort_itemifafriendch')
stigma.dv.names.t2 <- c('t2_abort_itemitswomensfa', 't2_abort_itemwomenwhohav', 
                        't2_abort_itemwithmodernb', 't2_abort_itemtheresnothi', 
                        't2_abort_itemifawomanfee', 't2_abort_itemifafriendch')
# Omnibus DV of all primary outcomes.
data$all.dvs.t1 <- compute.factor.dv(all.dv.names.t1, data$respondent_t1==1 & !is.na(data$respondent_t1))
data$all.dvs.t2 <- compute.factor.dv(all.dv.names.t2, data$respondent_t2==1 & !is.na(data$respondent_t2))
data$all.dvs.t1.t2.avg <- (data$all.dvs.t1 + data$all.dvs.t2)/2
#Replace missing values with either their t1 or t2 value
data$all.dvs.t1.t2.avg[is.na(data$all.dvs.t1.t2.avg)] <- data$all.dvs.t1[is.na(data$all.dvs.t1.t2.avg)]
data$all.dvs.t1.t2.avg[is.na(data$all.dvs.t1.t2.avg)] <- data$all.dvs.t2[is.na(data$all.dvs.t1.t2.avg)]
data$all.dvs.t1.t2.avg <- data$all.dvs.t1.t2.avg / sd(data$all.dvs.t1.t2.avg[data$treat_ind==1],
                                                      na.rm=TRUE)
# Policy DVs
data$policy.dv.t1 <- compute.factor.dv(policy.dv.names.t1, data$respondent_t1==1 & !is.na(data$respondent_t1))
data$policy.dv.t2 <- compute.factor.dv(policy.dv.names.t2, data$respondent_t2==1 & !is.na(data$respondent_t2))
data$policy.dv.t1.t2.avg <- (data$policy.dv.t1 + data$policy.dv.t2)/2
#Replace missing values with either their t1 or t2 value
data$policy.dv.t1.t2.avg[is.na(data$policy.dv.t1.t2.avg)] <- data$policy.dv.t1[is.na(data$policy.dv.t1.t2.avg)]
data$policy.dv.t1.t2.avg[is.na(data$policy.dv.t1.t2.avg)] <- data$policy.dv.t2[is.na(data$policy.dv.t1.t2.avg)]
# Stigma DVs
data$stigma.dv.t1 <- compute.factor.dv(stigma.dv.names.t1, data$respondent_t1==1 & !is.na(data$respondent_t1))
data$stigma.dv.t2 <- compute.factor.dv(stigma.dv.names.t2, data$respondent_t2==1 & !is.na(data$respondent_t2))
data$stigma.dv.t1.t2.avg <- (data$stigma.dv.t1 + data$stigma.dv.t2)/2
#Replace missing values with either their t1 or t2 value
data$stigma.dv.t1.t2.avg[is.na(data$stigma.dv.t1.t2.avg)] <- data$stigma.dv.t1[is.na(data$stigma.dv.t1.t2.avg)]
data$stigma.dv.t1.t2.avg[is.na(data$stigma.dv.t1.t2.avg)] <- data$stigma.dv.t2[is.na(data$stigma.dv.t1.t2.avg)]
# Covariates
t0.covariate.names <- c("t0_ideology","t0_pid", "t0_catholic", "t0_religious", 
                        "t0_gssifthewomanshealthisse", "t0_gssthewomanwantsitforany", 
                        "t0_gssifshebecamepregnantas", "t0_gssifthereisastrongchanc", 
                        "t0_gssifthefamilyhasaverylo", "t0_gssifsheisnotmarriedandd", 
                        "t0_gssifsheismarriedanddoes",
                        "t0_abort_itemitswomensfa", "t0_abort_itemwomenwhohav", 
                        "t0_abort_itemwithmodernb", "t0_abort_itemtheresnothi", 
                        "t0_abort_itemifawomanfee", "t0_abort_itemthegovernme", 
                        "t0_abort_itemifafriendch", "t0_ballot_abortion",
                        "vf_female", "vf_hispanic", "vf_asian", "vf_age")
x <- data[,c(t0.covariate.names)]
x <- as.matrix(x, dimnames = list(NULL, names(x)))
# Function to compute clustered standard errors, from Mahmood Arai.
cl <- function(fm, cluster){
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum))
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL)
}
# Function for estimating ATE with regression adjustment and clustered standard errors
est.ate <- function(dv, include.obs){
  include.obs <- include.obs & !is.na(dv) # remove missing values so cl() doesn't break
  lm.result <- lm(dv[include.obs] ~ data$treat_ind[include.obs] + x[include.obs,])
  return(cl(lm.result, data$hh_id[include.obs])[2,1:2]) # Return just treatment coefficient.
}

est.ate.nocovars <- function(dv, include.obs){
  include.obs <- include.obs & !is.na(dv) # remove missing values so cl() doesn't break
  lm.result <- lm(dv[include.obs] ~ data$treat_ind[include.obs])
  return(cl(lm.result, data$hh_id[include.obs])[2,1:2]) # Return just treatment coefficient.
}

abortion.results.df <- matrix(NA, nrow=9, ncol=6)
  abortion.results.df[1,1:2] <- est.ate(data$all.dvs.t1, data$respondent_t1==1)
  abortion.results.df[2,1:2] <- est.ate(data$policy.dv.t1, data$respondent_t1==1)
  abortion.results.df[3,1:2] <- est.ate(data$stigma.dv.t1, data$respondent_t1==1)
  abortion.results.df[4,1:2] <- est.ate(data$all.dvs.t2, data$respondent_t2==1)
  abortion.results.df[5,1:2] <- est.ate(data$policy.dv.t2, data$respondent_t2==1)
  abortion.results.df[6,1:2] <- est.ate(data$stigma.dv.t2, data$respondent_t2==1)
  abortion.results.df[7,1:2] <- est.ate(data$all.dvs.t1.t2.avg, data$respondent_t1==1 | data$respondent_t2==1)
  abortion.results.df[8,1:2] <- est.ate(data$policy.dv.t1.t2.avg, data$respondent_t1==1 | data$respondent_t2==1)
  abortion.results.df[9,1:2] <- est.ate(data$stigma.dv.t1.t2.avg, data$respondent_t1==1 | data$respondent_t2==1)
  abortion.results.df[1,5:6] <- est.ate.nocovars(data$all.dvs.t1, data$respondent_t1==1)
  abortion.results.df[2,5:6] <- est.ate.nocovars(data$policy.dv.t1, data$respondent_t1==1)
  abortion.results.df[3,5:6] <- est.ate.nocovars(data$stigma.dv.t1, data$respondent_t1==1)
  abortion.results.df[4,5:6] <- est.ate.nocovars(data$all.dvs.t2, data$respondent_t2==1)
  abortion.results.df[5,5:6] <- est.ate.nocovars(data$policy.dv.t2, data$respondent_t2==1)
  abortion.results.df[6,5:6] <- est.ate.nocovars(data$stigma.dv.t2, data$respondent_t2==1)
  abortion.results.df[7,5:6] <- est.ate.nocovars(data$all.dvs.t1.t2.avg, data$respondent_t1==1 | data$respondent_t2==1)
  abortion.results.df[8,5:6] <- est.ate.nocovars(data$policy.dv.t1.t2.avg, data$respondent_t1==1 | data$respondent_t2==1)
  abortion.results.df[9,5:6] <- est.ate.nocovars(data$stigma.dv.t1.t2.avg, data$respondent_t1==1 | data$respondent_t2==1)
  abortion.results.df[,3] <- c("All DVs t1", "Policy DVs t1", "Stigma DVs t1",
                               "All DVs t2", "Policy DVs t2", "Stigma DVs t2",
                               "All DVs t1/t2 Avg", "Policy DVs t1/t2 Avg", "Stigma DVs t1/t2 Avg")
  abortion.results.df[,4] <- c(1,1,1,2,2,2,3,3,3)
abortion.results.df <- as.data.frame(abortion.results.df)
abortion.results.df <- rename(abortion.results.df, c("V1" = "beta", "V2" = "se", "V3" = "dv", "V4" = "color", "V5" = "betaNoCovar", "V6" = "seNoCovar"))
abortion.results.df$beta <- as.numeric(levels(abortion.results.df$beta)[abortion.results.df$beta])
abortion.results.df$se <- as.numeric(levels(abortion.results.df$se)[abortion.results.df$se])
abortion.results.df$betaNoCovar <- as.numeric(levels(abortion.results.df$betaNoCovar)[abortion.results.df$betaNoCovar])
abortion.results.df$seNoCovar <- as.numeric(levels(abortion.results.df$seNoCovar)[abortion.results.df$seNoCovar])
abortion.results.df$upper <- abortion.results.df$beta + 1.96 * abortion.results.df$se
abortion.results.df$lower <- abortion.results.df$beta - 1.96 * abortion.results.df$se
#Reorder levels
abortion.results.df$dv <-factor(abortion.results.df$dv,levels(abortion.results.df$dv)[c(1,4,7,3,6,9,2,5,8)])
# plot
ggplot(data = abortion.results.df, aes(x = dv, y = beta, ymin = lower, ymax = upper, color=color)) +
  geom_point() +
  geom_errorbar() +
  xlab("") + ylab("Treatment Effect\n(Standardized)") + 
  theme_bw() +
  geom_hline(yintercept=0, , colour="red", linetype="dashed") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  theme(legend.position="none", text = element_text(family = 'Verdana'))
#Figure 10: Treatment Effect Estimates of Canvassing on Abortion Attitudes
ggsave("output/abortion_results.tiff", width = 8, height = 3, unit = 'in')

#Table OA4: Estimated Treatment Effects in Abortion Study
#This produces the numbers required for the table, not the formatted table itself.
xtable(abortion.results.df[,c(3,1,2,5,6)], xdigits=4)
#Get observations
#t1
nobs(lm(data$all.dvs.t1~data$treat_ind + x))
#t2
nobs(lm(data$all.dvs.t2~data$treat_ind + x))
#t1/t2
nobs(lm(data$all.dvs.t1.t2.avg~data$treat_ind + x))

#Table OA6: Balance Check Among Survey Respondents
round(cl(lm(data[data$respondent_t0 == 1, "treat_ind"]~x[data$respondent_t0 == 1,]), data$hh_id[data$respondent_t0 == 1]), 3)
nobs(lm(data[data$respondent_t0 == 1, "treat_ind"]~x[data$respondent_t0 == 1,]))
round(summary(lm(data[data$respondent_t0 == 1, "treat_ind"]~x[data$respondent_t0 == 1,]))$r.squared, 3)
round(summary(lm(data[data$respondent_t0 == 1, "treat_ind"]~x[data$respondent_t0 == 1,]))$fstatistic, 3)
#Table OA7: Balance Check Among Compliers
round(cl(lm(data[data$canvassed == 1, "treat_ind"]~x[data$canvassed == 1,]), data$hh_id[!is.na(data$canvassed)]), 3)
nobs(lm(data[data$canvassed == 1, "treat_ind"]~x[data$canvassed == 1,]))
round(summary(lm(data[data$canvassed == 1, "treat_ind"]~x[data$canvassed == 1,]))$r.squared, 3)
round(summary(lm(data[data$canvassed == 1, "treat_ind"]~x[data$canvassed == 1,]))$fstatistic, 3)
#Table OA8: Balance Check Among First Post-Survey Respondents
round(cl(lm(data[data$respondent_t1 == 1, "treat_ind"]~x[data$respondent_t1 == 1,]), data$hh_id[!is.na(data$respondent_t1)]), 3)
nobs(lm(data[data$respondent_t1 == 1, "treat_ind"]~x[data$respondent_t1 == 1,]))
round(summary(lm(data[data$respondent_t1 == 1, "treat_ind"]~x[data$respondent_t1 == 1,]))$r.squared, 3)
round(summary(lm(data[data$respondent_t1 == 1, "treat_ind"]~x[data$respondent_t1 == 1,]))$fstatistic, 3)
#Table OA9: Balance Check Among Second Post-Survey Respondents
round(cl(lm(data[data$respondent_t2 == 1, "treat_ind"]~x[data$respondent_t2 == 1,]), data$hh_id[!is.na(data$respondent_t2)]), 3)
nobs(lm(data[data$respondent_t2 == 1, "treat_ind"]~x[data$respondent_t2 == 1,]))
round(summary(lm(data[data$respondent_t2 == 1, "treat_ind"]~x[data$respondent_t2 == 1,]))$r.squared, 3)
round(summary(lm(data[data$respondent_t2 == 1, "treat_ind"]~x[data$respondent_t2 == 1,]))$fstatistic, 3)

